Relación con muertes

Autor/a

Brian Norman Peña-Calero

Fecha de publicación

19 de febrero de 2024

1 Cargar paquetes

Código
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Código
library(lme4)
Loading required package: Matrix

Attaching package: 'Matrix'

The following objects are masked from 'package:tidyr':

    expand, pack, unpack
Código
library(lmerTest)

Attaching package: 'lmerTest'

The following object is masked from 'package:lme4':

    lmer

The following object is masked from 'package:stats':

    step
Código
library(merDeriv)
Loading required package: nonnest2
This is nonnest2 0.5-6.
nonnest2 has not been tested with all combinations of supported model classes.
Loading required package: sandwich
Loading required package: lavaan
This is lavaan 0.6-17
lavaan is FREE software! Please report any bugs.
Código
library(performance)
library(parameters)
# options(scipen = 9)

options(parameters_labels = TRUE) 

2 Importar datos

Código
gbmt_fit_final <- readRDS("01_data/processed/gbmt_fit_final.rds")
deaths_by_year <- read_csv(file = "01_data/processed/deaths_by_year.csv")
Rows: 6945 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (1): ISO2
dbl (3): SALID1, YEAR, deaths

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

3 Formato de datos

Código
gbmt_log_3 <- gbmt_fit_final %>% 
  filter(scale_name == "logarithmic", ng == 3) %>% 
  slice(1)

gbmt_log_3 <- gbmt_log_3$gbmt_fit_total[[1]]


gbmt_log_4 <- gbmt_fit_final %>% 
  filter(scale_name == "logarithmic", ng == 4) %>% 
  slice(1)

gbmt_log_4 <- gbmt_log_4$gbmt_fit_total[[1]]


gbmt_log <- gbmt_log_3 %>% 
  bind_rows(gbmt_log_4,
            .id = "Clusters") %>% 
  mutate(Clusters = case_match(Clusters,
                               "1" ~ 3,
                               "2" ~ 4))

Unificar datos:

Código
gbmt_log_merged <- gbmt_log %>%
  select(Clusters, ISO2, SALID1, YEAR = year, population_imp_norm:group) %>% 
  inner_join(
    deaths_by_year
  ) %>% 
  mutate(deaths_per_100k = (deaths / population_imp) * 100000)
Joining with `by = join_by(ISO2, SALID1, YEAR)`

4 Tabla descriptiva

Código
gbmt_log_descriptive <- gbmt_log %>%
  filter(Clusters == 4) %>% 
  select(country, ISO2, SALID1, YEAR = year, population_imp:group) %>% 
  inner_join(
    deaths_by_year
  ) %>% 
  mutate(
    country = str_to_title(country),
    deaths_per_100k = (deaths / population_imp) * 100000
  ) %>% 
  select(-c(ISO2, SALID1, YEAR, deaths)) %>% 
  relocate(group, .after = deaths_per_100k)
Joining with `by = join_by(ISO2, SALID1, YEAR)`
Código
labelled::var_label(gbmt_log_descriptive) <- list(
  country = "Country",
  population_imp = "Population",
  bectuareal1ux_imp = "Urban Area",
  deaths_per_100k    = "Deaths per 100K",
  group = "Clusters"
)
Código
library(gtsummary)
tab1 <- gbmt_log_descriptive %>% 
  tbl_summary(
    by = group,
    percent = "row"
  ) %>% 
  modify_header(label = "") %>%
  bold_labels() %>% 
  add_overall(last = TRUE)

tab1
1, N = 1,7491 2, N = 9641 3, N = 1,0591 4, N = 2141 Overall, N = 3,9861
Country




    Argentina 49 (50%) 14 (14%) 35 (36%) 0 (0%) 98 (100%)
    Brazil 1,200 (54%) 576 (26%) 384 (17%) 48 (2.2%) 2,208 (100%)
    Chile 128 (44%) 64 (22%) 64 (22%) 32 (11%) 288 (100%)
    Colombia 190 (38%) 57 (12%) 209 (42%) 38 (7.7%) 494 (100%)
    Costa Rica 16 (100%) 0 (0%) 0 (0%) 0 (0%) 16 (100%)
    El Salvador 15 (33%) 15 (33%) 15 (33%) 0 (0%) 45 (100%)
    Guatemala 7 (100%) 0 (0%) 0 (0%) 0 (0%) 7 (100%)
    Mexico 144 (18%) 224 (27%) 352 (43%) 96 (12%) 816 (100%)
    Panama 0 (0%) 14 (100%) 0 (0%) 0 (0%) 14 (100%)
Population 196,337 (139,475, 274,233) 140,618 (120,151, 208,516) 414,158 (211,435, 850,431) 176,715 (134,794, 253,010) 200,752 (135,821, 349,456)
Urban Area 2,742 (1,486, 5,111) 1,726 (974, 2,953) 5,706 (2,691, 10,468) 1,176 (678, 3,384) 2,733 (1,442, 5,966)
Deaths per 100K 765 (639, 1,063) 706 (575, 997) 562 (457, 779) 574 (490, 940) 701 (553, 942)
1 n (%); Median (IQR)
Código
tab2 <- gbmt_log_descriptive %>% 
  tbl_summary(
    by = country,
    percent = "row"
  ) %>% 
  modify_header(label = "") %>%
  bold_labels() %>% 
  add_overall(last = TRUE)

tab2
Argentina, N = 981 Brazil, N = 2,2081 Chile, N = 2881 Colombia, N = 4941 Costa Rica, N = 161 El Salvador, N = 451 Guatemala, N = 71 Mexico, N = 8161 Panama, N = 141 Overall, N = 3,9861
Population 281,699 (127,245, 555,445) 186,154 (130,094, 302,125) 180,732 (143,769, 232,177) 235,196 (141,438, 366,436) 294,932 (287,559, 302,885) 252,434 (247,207, 303,155) 787,981 (786,722, 789,242) 244,510 (141,902, 870,984) 467,866 (464,203, 472,790) 200,752 (135,821, 349,456)
Urban Area 4,330 (3,327, 11,983) 2,616 (1,540, 4,918) 1,547 (981, 2,373) 1,072 (451, 2,189) 31,222 (30,766, 31,689) 1,154 (195, 11,561) 20,671 (20,520, 20,821) 5,055 (2,678, 11,149) 13,553 (12,304, 14,368) 2,733 (1,442, 5,966)
Deaths per 100K 1,158 (807, 2,390) 697 (583, 856) 662 (587, 750) 612 (517, 851) 3,290 (2,950, 3,582) 719 (531, 2,581) 2,293 (2,199, 2,405) 791 (472, 1,266) 1,539 (1,373, 1,711) 701 (553, 942)
Clusters









    1 49 (2.8%) 1,200 (69%) 128 (7.3%) 190 (11%) 16 (0.9%) 15 (0.9%) 7 (0.4%) 144 (8.2%) 0 (0%) 1,749 (100%)
    2 14 (1.5%) 576 (60%) 64 (6.6%) 57 (5.9%) 0 (0%) 15 (1.6%) 0 (0%) 224 (23%) 14 (1.5%) 964 (100%)
    3 35 (3.3%) 384 (36%) 64 (6.0%) 209 (20%) 0 (0%) 15 (1.4%) 0 (0%) 352 (33%) 0 (0%) 1,059 (100%)
    4 0 (0%) 48 (22%) 32 (15%) 38 (18%) 0 (0%) 0 (0%) 0 (0%) 96 (45%) 0 (0%) 214 (100%)
1 Median (IQR); n (%)
Código
tab1 %>%
  as_flex_table() %>%
  flextable::save_as_docx(path = "./02_output/tables/descriptive1.docx")

tab2 %>%
  as_flex_table() %>%
  flextable::save_as_docx(path = "./02_output/tables/descriptive2.docx")

5 Gráfico inicial de promedio de muertes

Código
gbmt_log_merged %>% 
  group_by(Clusters, group, YEAR) %>% 
  summarise(
    deaths_per_100k = mean(deaths_per_100k)
  ) %>% 
  ungroup() %>% 
  ggplot(aes(x = YEAR, y = deaths_per_100k, color = as.factor(group))) +
  geom_line(aes(group = group)) +
  geom_point() +
  
  labs(title = "Promedio de Muertes por 100,000 habitantes por Grupo a lo largo del Tiempo",
       x = "Año", y = "Muertes por 100,000 habitantes", color = "Grupo") +
  facet_wrap(vars(Clusters)) +
  theme_minimal()
`summarise()` has grouped output by 'Clusters', 'group'. You can override using
the `.groups` argument.

Código
ggsave(
  filename = "02_output/plots/promedio_muertes_group_log.png",
  dpi = 300,
  bg = "white"
)

6 Modelo lineal mixto

6.1 Modelo 1 (sin offset)

6.1.1 Lineal

Código
model_3 <- lmer(
  deaths_per_100k ~ YEAR * group + (1|SALID1), 
  data = gbmt_log_merged %>% filter(Clusters == 3)
)

performance(model_3) %>% print_html()
AIC AICc BIC R2_conditional R2_marginal ICC RMSE Sigma
51062.24 51062.28 51112.57 0.98 0.05 0.98 115.89 119.79
Código
parameters(model_3) %>% print_html()
Model Summary
Parameter Coefficient SE 95% CI t(3978) p
Fixed Effects
(Intercept) -15204.66 1535.92 (-18215.92, -12193.39) -9.90 < .001
YEAR 7.94 0.76 (6.44, 9.43) 10.39 < .001
group (2) -19921.91 1889.31 (-23626.03, -16217.80) -10.54 < .001
group (3) -4769.63 2522.78 (-9715.70, 176.44) -1.89 0.059
YEAR * group (2) 10.08 0.94 (8.24, 11.93) 10.73 < .001
YEAR * group (3) 2.40 1.25 (-0.06, 4.86) 1.91 0.056
Random Effects
SD (Intercept: SALID1) 753.94
SD (Residual) 119.79
Código
parameters(model_3, standardize = "basic") %>% print_html()
Standardizing coefficients only works for fixed effects of the mixed
  model.
Model Summary
Parameter Std. Coef. SE 95% CI t(3978) p
(Intercept) 0.00 0.00 (0.00, 0.00) -9.90 < .001
YEAR 0.05 5.19e-03 (0.04, 0.06) 10.39 < .001
group (2) -14.12 1.34 (-16.75, -11.49) -10.54 < .001
group (3) -2.53 1.34 (-5.15, 0.09) -1.89 0.059
YEAR * group (2) 14.35 1.34 (11.73, 16.97) 10.73 < .001
YEAR * group (3) 2.56 1.34 (-0.06, 5.17) 1.91 0.056
Código
model_4 <- lmer(
  deaths_per_100k ~ YEAR * group + (1|SALID1), 
  data = gbmt_log_merged %>% filter(Clusters == 4)
)

performance(model_4) %>% print_html()
AIC AICc BIC R2_conditional R2_marginal ICC RMSE Sigma
51031.21 51031.27 51094.12 0.98 0.04 0.98 115.56 119.47
Código
parameters(model_4) %>% print_html()
Model Summary
Parameter Coefficient SE 95% CI t(3976) p
Fixed Effects
(Intercept) -29967.87 1235.35 (-32389.85, -27545.89) -24.26 < .001
YEAR 15.42 0.61 (14.22, 16.63) 25.11 < .001
group (2) -9544.70 2079.32 (-13621.32, -5468.07) -4.59 < .001
group (3) 16889.04 1990.68 (12986.19, 20791.90) 8.48 < .001
group (4) 11795.16 3654.48 (4630.33, 18959.99) 3.23 0.001
YEAR * group (2) 4.78 1.03 (2.75, 6.80) 4.62 < .001
YEAR * group (3) -8.55 0.99 (-10.49, -6.61) -8.64 < .001
YEAR * group (4) -6.02 1.82 (-9.59, -2.46) -3.32 < .001
Random Effects
SD (Intercept: SALID1) 757.19
SD (Residual) 119.47
Código
parameters(model_4, standardize = "basic") %>% print_html()
Standardizing coefficients only works for fixed effects of the mixed
  model.
Model Summary
Parameter Std. Coef. SE 95% CI t(3976) p
(Intercept) 0.00 0.00 (0.00, 0.00) -24.26 < .001
YEAR 0.10 4.17e-03 (0.10, 0.11) 25.11 < .001
group (2) -5.84 1.27 (-8.33, -3.34) -4.59 < .001
group (3) 10.65 1.26 (8.19, 13.11) 8.48 < .001
group (4) 3.80 1.18 (1.49, 6.10) 3.23 0.001
YEAR * group (2) 5.86 1.27 (3.38, 8.35) 4.62 < .001
YEAR * group (3) -10.83 1.25 (-13.29, -8.37) -8.64 < .001
YEAR * group (4) -3.89 1.17 (-6.19, -1.59) -3.32 < .001

6.1.2 Poisson

Código
model_3_poisson <- glmer(
  deaths ~ YEAR * group + (1|SALID1), 
  data = gbmt_log_merged %>% filter(Clusters == 3),
  family = poisson(link = "log")
)
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
 - Rescale variables?;Model is nearly unidentifiable: large eigenvalue ratio
 - Rescale variables?
Código
performance(model_3_poisson) %>% print_html()
AIC AICc BIC R2_conditional R2_marginal ICC RMSE Sigma Score_log Score_spherical
93071.60 93071.62 93115.63 1.00 0.07 1.00 518.86 1 -11.31 0.01
Código
parameters(model_3_poisson) %>% print_html()
Model Summary
Parameter Coefficient SE 95% CI z p
Fixed Effects
(Intercept) -28.20 0.21 (-28.61, -27.78) -132.98 < .001
YEAR 0.02 8.81e-05 (0.02, 0.02) 204.21 < .001
group (2) -3.42 0.27 (-3.94, -2.90) -12.85 < .001
group (3) -15.05 0.50 (-16.04, -14.07) -29.87 < .001
YEAR * group (2) 1.53e-03 1.12e-04 (1.31e-03, 1.75e-03) 13.70 < .001
YEAR * group (3) 7.12e-03 2.32e-04 (6.67e-03, 7.58e-03) 30.67 < .001
Random Effects
SD (Intercept: SALID1) 0.97
Código
parameters(model_3_poisson, exponentiate = TRUE) %>% print_html()
Model Summary
Parameter Coefficient SE 95% CI z p
Fixed Effects
(Intercept) 5.67e-13 1.20e-13 (3.74e-13, 8.59e-13) -132.98 < .001
YEAR 1.02 8.97e-05 (1.02, 1.02) 204.21 < .001
group (2) 0.03 8.71e-03 (0.02, 0.06) -12.85 < .001
group (3) 2.90e-07 1.46e-07 (1.08e-07, 7.79e-07) -29.87 < .001
YEAR * group (2) 1.00 1.12e-04 (1.00, 1.00) 13.70 < .001
YEAR * group (3) 1.01 2.34e-04 (1.01, 1.01) 30.67 < .001
Random Effects
SD (Intercept: SALID1) 0.97
Código
model_4_poisson <- glmer(
  deaths ~ YEAR * group + (1|SALID1), 
  data = gbmt_log_merged %>% filter(Clusters == 4),
  family = poisson(link = "log")
)
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
 - Rescale variables?;Model is nearly unidentifiable: large eigenvalue ratio
 - Rescale variables?
Código
performance(model_4_poisson) %>% print_html()
AIC AICc BIC R2_conditional R2_marginal ICC RMSE Sigma Score_log Score_spherical
91102.05 91102.09 91158.66 1.00 0.08 1.00 505.56 1 -11.07 0.01
Código
parameters(model_4_poisson) %>% print_html()
Model Summary
Parameter Coefficient SE 95% CI z p
Fixed Effects
(Intercept) -28.75 0.17 (-29.09, -28.41) -165.04 < .001
YEAR 0.02 7.40e-05 (0.02, 0.02) 245.19 < .001
group (2) -17.82 0.37 (-18.55, -17.09) -47.76 < .001
group (3) 0.12 0.28 (-0.42, 0.66) 0.45 0.655
group (4) -10.50 0.78 (-12.02, -8.98) -13.50 < .001
YEAR * group (2) 8.66e-03 1.69e-04 (8.32e-03, 8.99e-03) 51.07 < .001
YEAR * group (3) 6.76e-05 1.16e-04 (-1.59e-04, 2.95e-04) 0.58 0.559
YEAR * group (4) 4.98e-03 3.61e-04 (4.27e-03, 5.68e-03) 13.79 < .001
Random Effects
SD (Intercept: SALID1) 0.96
Código
parameters(model_4_poisson, exponentiate = TRUE) %>% print_html()
Model Summary
Parameter Coefficient SE 95% CI z p
Fixed Effects
(Intercept) 3.27e-13 5.70e-14 (2.32e-13, 4.60e-13) -165.04 < .001
YEAR 1.02 7.53e-05 (1.02, 1.02) 245.19 < .001
group (2) 1.82e-08 6.79e-09 (8.76e-09, 3.78e-08) -47.76 < .001
group (3) 1.13 0.31 (0.66, 1.94) 0.45 0.655
group (4) 2.75e-05 2.14e-05 (6.00e-06, 1.26e-04) -13.50 < .001
YEAR * group (2) 1.01 1.71e-04 (1.01, 1.01) 51.07 < .001
YEAR * group (3) 1.00 1.16e-04 (1.00, 1.00) 0.58 0.559
YEAR * group (4) 1.00 3.63e-04 (1.00, 1.01) 13.79 < .001
Random Effects
SD (Intercept: SALID1) 0.96

6.2 Modelo 2 - Con offset

6.2.1 Estimación

Estimación de los modelos con la información poblacional como offset

Código
model_3_poisson_off <- glmer(
  deaths ~ YEAR * group + (1|SALID1)  + offset(log(population_imp)), 
  data = gbmt_log_merged %>% filter(Clusters == 3),
  family = poisson(link = "log")
)
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
Model failed to converge with max|grad| = 0.00333122 (tol = 0.002, component 1)
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
 - Rescale variables?;Model is nearly unidentifiable: large eigenvalue ratio
 - Rescale variables?
Código
performance(model_3_poisson_off) %>% print_html()
AIC AICc BIC R2_conditional R2_marginal ICC RMSE Sigma Score_log Score_spherical
1.26e+05 1.26e+05 1.26e+05 0.06 6.61e-03 0.05 554.63 1 -15.46 0.01
Código
parameters(model_3_poisson_off) %>% print_html()
Model Summary
Parameter Coefficient SE 95% CI z p
Fixed Effects
(Intercept) 5.66 0.19 (5.29, 6.03) 30.15 < .001
YEAR -5.37e-03 8.83e-05 (-5.54e-03, -5.19e-03) -60.79 < .001
group (2) -43.12 0.24 (-43.59, -42.66) -181.64 < .001
group (3) -36.90 0.48 (-37.84, -35.96) -76.97 < .001
YEAR * group (2) 0.02 1.12e-04 (0.02, 0.02) 193.27 < .001
YEAR * group (3) 0.02 2.33e-04 (0.02, 0.02) 79.05 < .001
Random Effects
SD (Intercept: SALID1) 0.51
Código
parameters(model_3_poisson_off, exponentiate = TRUE) %>% print_html()
Model Summary
Parameter Coefficient SE 95% CI z p
Fixed Effects
(Intercept) 286.84 53.84 (198.55, 414.39) 30.15 < .001
YEAR 0.99 8.78e-05 (0.99, 0.99) -60.79 < .001
group (2) 1.87e-19 4.44e-20 (1.17e-19, 2.98e-19) -181.64 < .001
group (3) 9.47e-17 4.54e-17 (3.70e-17, 2.42e-16) -76.97 < .001
YEAR * group (2) 1.02 1.15e-04 (1.02, 1.02) 193.27 < .001
YEAR * group (3) 1.02 2.38e-04 (1.02, 1.02) 79.05 < .001
Random Effects
SD (Intercept: SALID1) 0.51
Código
model_4_poisson_off <- glmer(
  deaths ~ YEAR * group + (1|SALID1) + offset(log(population_imp)), 
  data = gbmt_log_merged %>% filter(Clusters == 4),
  family = poisson(link = "log")
)
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
 - Rescale variables?;Model is nearly unidentifiable: large eigenvalue ratio
 - Rescale variables?
Código
performance(model_4_poisson_off) %>% print_html()
AIC AICc BIC R2_conditional R2_marginal ICC RMSE Sigma Score_log Score_spherical
1.19e+05 1.19e+05 1.19e+05 0.06 6.75e-03 0.05 524.84 1 -14.64 9.99e-03
Código
parameters(model_4_poisson_off) %>% print_html()
Model Summary
Parameter Coefficient SE 95% CI z p
Fixed Effects
(Intercept) -33.92 0.16 (-34.22, -33.61) -216.68 < .001
YEAR 0.01 7.41e-05 (0.01, 0.01) 196.17 < .001
group (2) -19.43 0.35 (-20.11, -18.74) -55.48 < .001
group (3) 41.18 0.25 (40.70, 41.66) 167.28 < .001
group (4) 7.20 0.74 (5.75, 8.65) 9.72 < .001
YEAR * group (2) 9.64e-03 1.70e-04 (9.30e-03, 9.97e-03) 56.83 < .001
YEAR * group (3) -0.02 1.16e-04 (-0.02, -0.02) -178.48 < .001
YEAR * group (4) -3.74e-03 3.61e-04 (-4.45e-03, -3.03e-03) -10.35 < .001
Random Effects
SD (Intercept: SALID1) 0.51
Código
parameters(model_4_poisson_off, exponentiate = TRUE) %>% print_html()
Model Summary
Parameter Coefficient SE 95% CI z p
Fixed Effects
(Intercept) 1.86e-15 2.92e-16 (1.37e-15, 2.53e-15) -216.68 < .001
YEAR 1.01 7.52e-05 (1.01, 1.01) 196.17 < .001
group (2) 3.65e-09 1.28e-09 (1.84e-09, 7.25e-09) -55.48 < .001
group (3) 7.65e+17 1.88e+17 (4.72e+17, 1.24e+18) 167.28 < .001
group (4) 1337.47 991.02 (313.02, 5714.71) 9.72 < .001
YEAR * group (2) 1.01 1.71e-04 (1.01, 1.01) 56.83 < .001
YEAR * group (3) 0.98 1.14e-04 (0.98, 0.98) -178.48 < .001
YEAR * group (4) 1.00 3.60e-04 (1.00, 1.00) -10.35 < .001
Random Effects
SD (Intercept: SALID1) 0.51

Ingreso de los valores predictivos para los modelos de 3 y 4 clústers:

Código
gbmt_log_merged <- gbmt_log_merged %>% 
  group_nest(Clusters) %>% 
  mutate(
    data = map2(
      data, Clusters,
      ~ case_when(
        .y == 3 ~ mutate(
          .x, 
          predicted_deaths = predict(model_3_poisson_off, type = "response"),
        ),
        .y == 4 ~ mutate(
          .x, 
          predicted_deaths = predict(model_4_poisson_off, type = "response"),
        ),
      )
    )
  ) %>% 
  unnest()
Warning: `cols` is now required when using `unnest()`.
ℹ Please use `cols = c(data)`.
Código
gbmt_log_merged <- gbmt_log_merged %>% 
  mutate(
    predicted_deaths_100k = predicted_deaths / population_imp * 100000
  )

6.2.2 Gráfico

Debido a que se ha agregado a los interceptos de las ciudades como efecto aleatorio, entonces las estimaciones y predicciones van a variar de ciudad a ciudad. Para poder visualizarlo, se podría resumir los datos con su media.

Código
plot_poisson_offset <- gbmt_log_merged %>% 
  group_by(Clusters, group, YEAR) %>% 
  summarise(
    across(
      c(predicted_deaths_100k, 
        deaths_per_100k),
      mean
    )
  ) %>% 
  ungroup() %>% 
  ggplot(aes(x = YEAR, 
           y = predicted_deaths_100k,
           color = group)) + 
  geom_point(aes(y = deaths_per_100k), alpha = 0.5,
             position = position_jitter(h = 0.2)) +  # Datos observados
  geom_line() +
  labs(title = "Promedio de Muertes por 100,000 habitantes por Grupo",
       x = "Año", y = "Muertes esperadas por 100,000 habitantes", color = "Grupo") +
  facet_wrap(vars(Clusters)) +
  theme_minimal()
`summarise()` has grouped output by 'Clusters', 'group'. You can override using
the `.groups` argument.
Código
plot_poisson_offset

Código
ggsave(
  plot_poisson_offset,
  filename = "02_output/plots/promedio_muertes_group_poisson.png",
  dpi = 300,
  bg = "white",
  width = 7,
  height = 4.5
)
Código
gbmt_log_merged %>% 
  ggplot(aes(x = YEAR, 
           y = predicted_deaths_100k,
           color = group)) + 
  geom_point(aes(y = deaths_per_100k), alpha = 0.5,
             position = position_jitter(h = 0.2)) +  # Datos observados
  geom_line(aes(group = SALID1)) +
  labs(title = "Promedio de Muertes por 100,000 habitantes por Grupo",
       x = "Año", y = "Muertes esperadas por 100,000 habitantes", color = "Grupo") +
  facet_grid(vars(Clusters), vars(group)) +
  # facet_wrap(vars(Clusters)) +
  theme_minimal()

6.3 Modelo 3 - Con offset sin efectos aleatorios

6.3.1 Estimación

Las estimaciones no consideran variaciones entre las ciudades

Código
model_3_poisson_off2 <- glm(
  deaths ~ YEAR * group + offset(log(population_imp)), 
  data = gbmt_log_merged %>% filter(Clusters == 3),
  family = poisson(link = "log")
)

performance(model_3_poisson_off2) %>% print_html()
AIC AICc BIC R2_Nagelkerke RMSE Sigma Score_log Score_spherical
5.12e+06 5.12e+06 5.12e+06 1 5381.53 1 -Inf 2.93e-03
Código
parameters(model_3_poisson_off2) %>% print_html()
Parameter Coefficient SE 95% CI z p
(Intercept) 20.91 0.17 (20.57, 21.25) 120.50 < .001
YEAR -0.01 8.64e-05 (-0.01, -0.01) -150.15 < .001
group (2) -83.35 0.22 (-83.78, -82.93) -381.40 < .001
group (3) -58.38 0.46 (-59.28, -57.48) -127.09 < .001
YEAR * NA 0.04 1.09e-04 (0.04, 0.04) 384.71 < .001
YEAR * NA 0.03 2.29e-04 (0.03, 0.03) 127.58 < .001
Código
parameters(model_3_poisson_off2, exponentiate = TRUE) %>% print_html()
Parameter Coefficient SE 95% CI z p
(Intercept) 1.21e+09 2.10e+08 (8.62e+08, 1.70e+09) 120.50 < .001
YEAR 0.99 8.53e-05 (0.99, 0.99) -150.15 < .001
group (2) 6.30e-37 1.38e-37 (4.11e-37, 9.67e-37) -381.40 < .001
group (3) 4.41e-26 2.02e-26 (1.79e-26, 1.08e-25) -127.09 < .001
YEAR * NA 1.04 1.13e-04 (1.04, 1.04) 384.71 < .001
YEAR * NA 1.03 2.36e-04 (1.03, 1.03) 127.58 < .001
Código
model_4_poisson_off2 <- glm(
  deaths ~ YEAR * group + offset(log(population_imp)), 
  data = gbmt_log_merged %>% filter(Clusters == 4),
  family = poisson(link = "log")
)
performance(model_4_poisson_off2) %>% print_html()
AIC AICc BIC R2_Nagelkerke RMSE Sigma Score_log Score_spherical
5.19e+06 5.19e+06 5.19e+06 1 5419.27 1 -Inf 3.19e-03
Código
parameters(model_4_poisson_off2) %>% print_html()
Parameter Coefficient SE 95% CI z p
(Intercept) -61.03 0.14 (-61.31, -60.75) -430.71 < .001
YEAR 0.03 7.06e-05 (0.03, 0.03) 399.46 < .001
group (2) -4.04 0.33 (-4.69, -3.39) -12.18 < .001
group (3) 83.26 0.23 (82.82, 83.70) 369.19 < .001
group (4) 33.79 0.71 (32.39, 35.19) 47.27 < .001
YEAR * NA 1.96e-03 1.65e-04 (1.63e-03, 2.28e-03) 11.84 < .001
YEAR * NA -0.04 1.12e-04 (-0.04, -0.04) -372.41 < .001
YEAR * NA -0.02 3.56e-04 (-0.02, -0.02) -47.89 < .001
Código
parameters(model_4_poisson_off2, exponentiate = TRUE) %>% print_html()
Parameter Coefficient SE 95% CI z p
(Intercept) 3.12e-27 4.43e-28 (2.37e-27, 4.12e-27) -430.71 < .001
YEAR 1.03 7.26e-05 (1.03, 1.03) 399.46 < .001
group (2) 0.02 5.83e-03 (9.17e-03, 0.03) -12.18 < .001
group (3) 1.45e+36 3.26e+35 (9.29e+35, 2.25e+36) 369.19 < .001
group (4) 4.74e+14 3.39e+14 (1.17e+14, 1.92e+15) 47.27 < .001
YEAR * NA 1.00 1.66e-04 (1.00, 1.00) 11.84 < .001
YEAR * NA 0.96 1.08e-04 (0.96, 0.96) -372.41 < .001
YEAR * NA 0.98 3.50e-04 (0.98, 0.98) -47.89 < .001

Ingreso de las predicciones en el df:

Código
gbmt_log_merged2 <- gbmt_log_merged %>% 
  group_nest(Clusters) %>% 
  mutate(
    data = map2(
      data, Clusters,
      ~ case_when(
        .y == 3 ~ mutate(
          .x, 
          predicted_deaths = predict(model_3_poisson_off2, type = "response"),
        ),
        .y == 4 ~ mutate(
          .x, 
          predicted_deaths = predict(model_4_poisson_off2, type = "response"),
        ),
      )
    )
  ) %>% 
  unnest()
Warning: `cols` is now required when using `unnest()`.
ℹ Please use `cols = c(data)`.
Código
gbmt_log_merged2 <- gbmt_log_merged2 %>% 
  mutate(
    predicted_deaths_100k = predicted_deaths / population_imp * 100000
  )

6.3.2 Gráfico

Gráfico de los datos resumidos:

Código
gbmt_log_merged2 %>% 
  group_by(Clusters, group, YEAR) %>%
  summarise(
    across(
      c(predicted_deaths_100k,
        deaths_per_100k),
      mean
    )
  ) %>%
  ungroup() %>%
  ggplot(aes(x = YEAR, 
           y = predicted_deaths_100k,
           color = group)) + 
  geom_point(aes(y = deaths_per_100k), alpha = 0.5,
             position = position_jitter(h = 0.2)) +  # Datos observados
  geom_line(linewidth = 1) +
  labs(title = "Promedio de Muertes por 100,000 habitantes por Grupo a lo largo del Tiempo",
       x = "Año", y = "Muertes esperadas por 100,000 habitantes", color = "Grupo") +
  facet_wrap(vars(Clusters)) +
  theme_minimal()
`summarise()` has grouped output by 'Clusters', 'group'. You can override using
the `.groups` argument.

Sin resumir:

Código
gbmt_log_merged2 %>% 
  ggplot(aes(x = YEAR, 
           y = predicted_deaths_100k,
           color = group)) + 
  geom_point(aes(y = deaths_per_100k), alpha = 0.5,
             position = position_jitter(h = 0.2)) +  # Datos observados
  geom_line(linewidth = 1) +
  labs(title = "Promedio de Muertes por 100,000 habitantes por Grupo a lo largo del Tiempo",
       x = "Año", y = "Muertes esperadas por 100,000 habitantes", color = "Grupo") +
  facet_wrap(vars(Clusters)) +
  theme_minimal()